## read in 2014 BG level turnout
files <- list.files("temp", pattern = "^14_ballots_by_bg", full.names = T)

ballots_14 <- rbindlist(lapply(files, readRDS)) %>%
  select(GEOID, ballots_pre = ballots) %>%
  mutate(year = "2016")

## read in 2016 BG level turnout
files <- list.files("temp", pattern = "^16_ballots_by_bg", full.names = T)

ballots_16 <- rbindlist(lapply(files, readRDS)) %>%
  select(GEOID, ballots) %>%
  mutate(year = "2016")

ballots_16 <- left_join(ballots_16, ballots_14)

## read in 2018 BG level turnout
files <- list.files("temp", pattern = "^18_ballots_by_bg", full.names = T)

ballots_18 <- rbindlist(lapply(files, readRDS)) %>%
  select(GEOID, ballots_pre = ballots) %>%
  mutate(year = "2020")

## read in 2020 BG level turnout
files <- list.files("temp", pattern = "^ballots_by_bg", full.names = T)

ballots_20 <- rbindlist(lapply(files, readRDS)) %>%
  select(GEOID, ballots) %>%
  mutate(year = "2020")

ballots_20 <- left_join(ballots_20, ballots_18)

## combine all years' turnout in long format
ballots <- bind_rows(ballots_16, ballots_20)
## remove unnecessary files from memory
cleanup("ballots")

########################### cvap
### read in block group cvap from each year

cvap14 <- fread("raw_data/CVAP_2010-2014_ACS_csv_files/BlockGr.csv") %>%
  filter(lntitle == "Total") %>%
  mutate(GEOID = substring(geoid, 8),
         year = "2016") %>%
  select(GEOID, cvap_pre = CVAP_EST, year)

cvap16 <- fread("raw_data/CVAP_2012-2016_ACS_csv_files/BlockGr.csv") %>%
  filter(lntitle == "Total") %>%
  mutate(GEOID = substring(geoid, 8),
         year = "2016") %>%
  select(GEOID, cvap = CVAP_EST, year)

cvap16 <- left_join(cvap16, cvap14)

cvap18 <- fread("raw_data/CVAP_2014-2018_ACS_csv_files/BlockGr.csv") %>%
  filter(lntitle == "Total") %>%
  mutate(GEOID = substring(geoid, 8),
         year = "2020") %>%
  select(GEOID, year, cvap_pre = cvap_est)

cvap20 <- fread("raw_data/CVAP_2015-2019_ACS_csv_files/BlockGr.csv") %>%
  filter(lntitle == "Total") %>%
  mutate(GEOID = substring(geoid, 8),
         year = "2020") %>%
  select(GEOID, year, cvap = cvap_est)

cvap20 <- left_join(cvap20, cvap18)

cvap <- bind_rows(cvap16, cvap20)


## merge cvap and ballot count data
ballots <- inner_join(ballots, cvap)
cleanup("ballots")
########################### census

##read in ACS census demographic info for each BG for each year

c16 <- readRDS("temp/census_bgs_16.rds") %>%
  mutate(year = "2016") %>%
  select(GEOID, year, latino, asian, nh_white, nh_black, median_income, median_age,
         pop_dens, some_college, population)

c20 <- readRDS("temp/census_bgs_19.rds") %>%
  mutate(year = "2020") %>%
  select(GEOID, year, latino, asian, nh_white, nh_black, median_income, median_age,
         pop_dens, some_college, population)
census <- bind_rows(c16, c20)

## merge census data in, keep only primary data. save 2020 data
ballots <- inner_join(ballots, census)
saveRDS(filter(ballots, year == 2020), "temp/to_ey.rds")

## temperature data
clim <- fread("raw_data/climdiv-tmpccy-v1.0.0-20250506.txt")

lu <- data.table(state = c("Alabama", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
                           "Delaware", "Florida", "Georgia", "Idaho", "Illinois", "Indiana", "Iowa",
                           "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland", "Massachusetts",
                           "Michigan", "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska",
                           "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York",
                           "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon",
                           "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota",
                           "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington",
                           "West Virginia", "Wisconsin", "Wyoming", "Hawaii", "Alaska"),
                 scode = sprintf("%02d", 1:50))

lu <- left_join(lu, fips_codes |> 
                  select(state = state_name, state_code) |> 
                  group_by(state) |> 
                  filter(row_number() == 1))

colnames(clim) <- c("d", paste0("m", c(1:12)))

clim <- mutate(clim,
               d = str_pad(trimws(d), width = 11, pad = "0", side = "left"),
               year = substring(d, 8),
               county = substring(d, 3, 5),
               scode = substring(d, 1, 2)) |> 
  select(-d) |> 
  filter(year %in% c(2016:2021)) |> 
  pivot_longer(starts_with("m")) |> 
  group_by(year, county, scode) |> 
  summarize(av_temp = mean(value)) |> 
  left_join(lu) |> 
  mutate(county = paste0(state_code, county)) |> 
  select(-scode, -state_code, -state)


ballots <- ballots |> 
  mutate(county = substring(GEOID, 1, 5)) |> 
  left_join(clim)

## covid counts (cumulative by day, from NYT)

covid <- fread("raw_data/us-counties.csv") |> 
  filter(date <= "2020-11-03") |> 
  group_by(fips) |> 
  filter(date == max(date)) |> 
  select(county = fips, cases) |> 
  mutate(county = str_pad(county, pad = "0", side = "left", width = 5))

county_pop <- get_acs(geography = "county",
                                        variables = "B03002_001",
                                        year = 2020) %>%
  select(GEOID, population = estimate)

covid <- left_join(covid, select(county_pop, county = GEOID, population)) |> 
  mutate(cases = cases / population) |> 
  select(-population)

ballots <- left_join(ballots, covid)

## group quarters data

group_quarters <- get_acs("county", "B09019_026", summary_var = "B09019_001", year = 2019) |> 
  mutate(share_group = estimate / summary_est) |> 
  select(county = GEOID, share_group)

ballots <- left_join(ballots, group_quarters)

### eavs vote mode data

eavs_2016 <- read_xlsx("raw_data/eavs_2016/EAVS_2016_for_Public_Release_V1.1_0.xlsx") |> 
  mutate(across(c(F1a, F1b), as.numeric)) |> 
  filter(is.finite(F1b), is.finite(F1a),
         F1b >= 0, F1a > 0) |> 
  mutate(county = substring(FIPSCode, 1, 5),
         year = "2016") |> 
  group_by(county, year) |> 
  summarize(in_person_rate = sum(F1b, na.rm = T) / sum(F1a, na.rm = T))

eavs_2020 <- fread("raw_data/eavs_2020/2020_EAVS_for_Public_Release_nolabel_V1.2_CSV.csv") |> 
  mutate(across(c(F1a, F1b), as.numeric)) |> 
  filter(is.finite(F1b), is.finite(F1a),
         F1b >= 0, F1a > 0) |> 
  mutate(county = substring(FIPSCode, 1, 5),
         year = "2020") |> 
  group_by(county, year) |> 
  summarize(in_person_rate = sum(F1b, na.rm = T) / sum(F1a, na.rm = T))

eavs <- bind_rows(eavs_2016, eavs_2020)

ballots <- left_join(ballots, eavs)

## contact rates (from mrp code, already run)

contact <- bind_rows(
  readRDS("temp/mrp_2016.rds") |> 
    select(county = county_fips,
           contact = p_mrp_est) |> 
    mutate(year = "2016"),
  readRDS("temp/mrp_2020.rds") |> 
    select(county = county_fips,
           contact = p_mrp_est) |> 
    mutate(year = "2020")
)

ballots <- left_join(ballots, contact)

## homicides (from code already run)

homicides <- bind_rows(
  readRDS("temp/homicides_2016.rds"),
  readRDS("temp/homicides_2020.rds")
) |> 
  mutate(year = as.character(year))

ballots <- left_join(ballots, homicides, by = c("county" = "FIPS", "year"))

saveRDS(ballots, "temp/full_demos.rds")
ballots <- readRDS("temp/full_demos.rds")
cleanup("ballots")

####################

## read in dists for each BG
dists <- readRDS("temp/dists_long_new_from_gva.rds") %>% 
  mutate(year = as.character(floor(year(date) / 2) * 2))

## calculate turnout, set to 100% if over 100%
dists <- inner_join(dists, ballots) %>% 
  mutate(turnout = ballots / cvap,
         turnout_pre = ballots_pre / cvap_pre,
         pre = (year == "2016" & date <= "2016-11-08") | (year == "2020" & date <= "2020-11-03")) %>% 
  mutate_at(vars(starts_with("turnout")), ~ ifelse(. > 1, 1, .))


#############################
#############################
#############################
#############################
## dem share

dem_share <- rbindlist(lapply(
  list.files("temp", pattern = "^[a-z]{2}_bg_prec_\\d{2}", full.names = T),
  readRDS)) |> 
  mutate(year = as.character(year))

dists <- left_join(dists, dem_share)

##### SAVE PRIMARY DATASET FOR RDITS!!!
saveRDS(dists, "temp/shooting_demos.rds")


####################################################
####################################################
####################################################

dists <- readRDS("temp/shooting_demos.rds") |> 
  mutate(date = as.Date(date),
         id = id4, ## id (and distance, in next line) of nearest 4-victim shooting
         dist = dist4) |> 
  filter(!is.na(id),
         id %in% (readRDS("temp/geocoded_shootings_from_gva.rds") |> 
                    filter(home + dv == 0))$id ## drop if in the home or domestic violence
  )

dists$state_code <- substring(dists$GEOID, 1, 2)

dists <- left_join(dists,
                   fips_codes |> 
                     select(state, state_code) |> 
                     unique()) |> 
  left_join(data.table(state = state.abb,
                       region = state.region)) |> 
  mutate(region = ifelse(state_code == 11, 2, region),
         northeast = region == 1,
         south = region == 2,
         northcentral = region == 3,
         west = region == 4) |> 
  select(-state, -state_code, -region)

### loop over thresholds for RDITS
run_rdd <- function(threshold){
  library(data.table)
  library(tidyverse)
  library(rdrobust)
  print(threshold)
  ##keep the observations within the threshold closest to election day
  ## using data.table notation to speed this up
  set_pre <- dists[dists$dist <= threshold & dists$pre,
                   .SD[date %in% max(date)], by=list(year, GEOID)]
  set_pre <- set_pre[, .SD[1], by = .(year, GEOID)] %>% 
    mutate(treated = T)
  
  ##keep the observations within the threshold closest to election day
  set_post <- dists[dists$dist <= threshold & !dists$pre,
                    .SD[date %in% min(date)], by=list(year, GEOID)]
  set_post <- set_post[!(paste0(set_post$GEOID, set_post$year) %in% paste0(set_pre$GEOID, set_pre$year)),
                       .SD[1], by = .(year, GEOID)] %>% 
    mutate(treated = F)
  
  full_treat <- bind_rows(set_pre, set_post) %>% 
    select(GEOID, id, date, dist, year, turnout,
           median_income, nh_white, nh_black, median_age, pop_dens, latino, asian,
           some_college, turnout_pre, treated, population, av_temp, cases, share_group,
           in_person_rate, contact, homicide, northeast, south, northcentral) %>% 
    mutate(d2 = ifelse(year == "2020", as.integer(date - as.Date("2020-11-03")),
                       as.integer(date - as.Date("2016-11-08"))),
           t16 = year == "2016")
  
  if(threshold == 0.5){
    saveRDS(full_treat, "temp/primary_half.rds")
  }
  ########################################
  
  
  l <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = 1, c = 0, cluster = full_treat$id,
                covs = select(full_treat,
                              latino, nh_white, asian,
                              nh_black, median_income, median_age,
                              pop_dens, turnout_pre, t16, some_college))
  
  l2 <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = 1, c = 0, cluster = full_treat$id,
                 covs = select(full_treat,
                               latino, nh_white, asian,
                               nh_black, median_income, median_age,
                               pop_dens, t16, some_college))
  
  l3 <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = 1, c = 0, cluster = full_treat$id)
  
  l4 <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = 1, c = 0, cluster = full_treat$id,
                covs = select(full_treat,
                              latino, nh_white, asian,
                              nh_black, median_income, median_age,
                              pop_dens, turnout_pre, t16, some_college, northeast, south, northcentral))
  
  pop <- full_treat |> 
    filter(d2 <= 0,
           abs(d2) < l[["bws"]][1]) |> 
    summarize(population = sum(population)) |> 
    pull()
  
  pop_week <- full_treat |> 
    filter(d2 <= 0,
           abs(d2) < 7) |> 
    summarize(population = sum(population)) |> 
    pull()
  
  f <- tibble(coef = l$coef,
              se = l$se, 
              pv = l$pv,
              p = threshold,
              n_pre = l$N_h[1],
              n_post = l$N_h[2],
              n = l$N_h[1] + l$N_h[2],
              bw = l[["bws"]][1],
              u = l[["ci"]][,2],
              l = l[["ci"]][,1],
              population_treated = pop,
              population_week = pop_week,
              
              coef_no_prior = l2$coef,
              u_no_prior = l2[["ci"]][,2],
              l_no_prior = l2[["ci"]][,1],
              
              coef_no_covs = l3$coef,
              u_no_covs = l3[["ci"]][,2],
              l_no_covs = l3[["ci"]][,1],
              
              coef_region = l4$coef,
              u_region = l4[["ci"]][,2],
              l_region = l4[["ci"]][,1])
}

cl <- makeCluster(12)  
registerDoParallel(cl)

clusterExport(cl, list("run_rdd", "dists"))
out <- rbindlist(parLapply(cl, c(0.25, 0.5, 0.75, seq(1, 10, 1), seq(12, 20, 2)), fun = run_rdd))

out$estimate <- rep(c('Traditional','Bias-Adjusted','Robust'),nrow(out)/3)

out <- mutate_at(out, vars(coef, l, u,
                           coef_no_prior,
                           u_no_prior,
                           l_no_prior,
                           
                           coef_no_covs,
                           u_no_covs,
                           l_no_covs,
                           
                           coef_region,
                           u_region,
                           l_region), ~. * -1)

out$estimate <- factor(out$estimate, levels = c('Traditional','Bias-Adjusted','Robust'))

saveRDS(out, "temp/primary_out_data.rds")
## create figure 3, s4, s5

for(r in c("primary", "no_covs", "no_prior_turnout", "with_region")){
  out <- readRDS("temp/primary_out_data.rds")
  
  if(r == "no_covs"){
    out <- mutate(out,
                  coef = coef_no_covs,
                  l = l_no_covs,
                  u = u_no_covs)
    
  }else if(r == "no_prior_turnout"){
    out <- mutate(out,
                  coef = coef_no_prior,
                  l = l_no_prior,
                  u = u_no_prior)
  }else if(r == "with_region"){
    out <- mutate(out,
                  coef = coef_region,
                  l = l_region,
                  u = u_region)
  }
  different_dists <- ggplot(filter(out, estimate == "Robust", p <= 10),
                            aes(x = p, y = coef, ymin = l, ymax = u)) +
    geom_point() +
    geom_errorbar(width = 0) + 
    theme_bc(base_family = "LM Roman 10", base_size = 10,
             plot.margin = margin()) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    scale_x_continuous(breaks = seq(0, 10, 2)) +
    scale_y_continuous(limits = c(-0.1, 0.2)) +
    labs(y = "Local average treatment effect", x = "Radius around shooting (miles)")
  different_dists
  
  saveRDS(different_dists, paste0("temp/different_dists_", r, ".rds"))
  
  fwrite(out |> 
           filter(estimate == "Robust") |> 
           select(distance = p,
                  estimate = coef,
                  lower_bound = l,
                  upper_bound = u), paste0("data_for_figures/figure_3_", r, ".csv"))
  
  for(i in outpaths){
    ggsave(paste0(i, r, "_results.png"), height = 4, width = 3, units = "in")
  }
}

#############
#############
############# create figure s17
out <- readRDS("temp/primary_out_data.rds")
different_dists <- ggplot(filter(out, estimate == "Robust"),
                          aes(x = p, y = coef, ymin = l, ymax = u)) +
  geom_point() +
  geom_errorbar(width = 0) + 
  theme_bc(base_family = "LM Roman 10", base_size = 10,
           plot.margin = margin()) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_x_continuous(breaks = seq(0, 20, 2)) +
  labs(y = "Local average treatment effect", x = "Radius around shooting (miles)")
different_dists

saveRDS(different_dists, "temp/different_dists_primary_20_miles.rds")

for(i in outpaths){
  ggsave(paste0(i, "primary_results_20_miles.png"), height = 4, width = 3, units = "in")
}


## create figure s9, s10
sample_sizes <- ggplot(filter(out, estimate == "Robust", p <= 10),
                       aes(x = p, y = n)) +
  geom_line() +
  geom_point() +
  theme_bc(base_family = "LM Roman 10") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(y = "Effective sample size", x = "Radius around shooting (miles)") +
  scale_y_continuous(labels = comma)
sample_sizes
saveRDS(sample_sizes, "temp/sample_size_plot.rds")
for(i in outpaths){
  ggsave(paste0(i, "primary_sample_size.png"), height = 4, width = 6, units = "in")
}

fwrite(out |> 
         filter(estimate == "Robust") |> 
         select(distance = p,
                observations = n), "data_for_figures/figure_A4.csv")

pops <- out |> 
  filter(estimate == "Robust",
         p <= 10) |> 
  select(p, population_treated, population_week) |> 
  pivot_longer(starts_with("pop")) |> 
  mutate(name = case_when(name == "population_treated" ~ "Population in Pre-Election Bandwidth",
                          T ~ "Population Treated in Week Before Election"),
         value2 = log(value))

sample_sizes <- ggplot(pops,
                       aes(x = p, y = value2)) +
  facet_grid(. ~ name) +
  geom_line() +
  geom_point() +
  theme_bc(base_family = "LM Roman 10") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(y = "Population", x = "Radius around shooting (miles)") +
  scale_y_continuous(labels = c("10^5", "10^6", "10^7", "10^8"),
                     breaks = c(log(100000), log(1000000), log(10000000), log(100000000)),
                     limits = c(log(100000), log(200000000)))
sample_sizes
saveRDS(sample_sizes, "temp/population_size_plot.rds")
for(i in outpaths){
  ggsave(paste0(i, "primary_population_size.png"), height = 4, width = 6, units = "in")
}

fwrite(pops, "data_for_figures/figure_A4_population.csv")



###########################
###########################
###########################
## read in half-mile observations
full_treat <- readRDS("temp/primary_half.rds")

l <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = 1, c = 0, cluster = full_treat$id,
              covs = select(full_treat,
                            nh_black, latino, nh_white, asian, median_income, median_age,
                            pop_dens, turnout_pre, t16, some_college))

pc <- rdpower::rdpower(cbind(full_treat$turnout, full_treat$d2), cutoff = 0, cluster = full_treat$id,
                       tau = 0.05,
                       p = 1,
                       covs = select(full_treat,
                                     nh_black, latino, nh_white, asian, median_income, median_age,
                                     pop_dens, turnout_pre, t16, some_college))
########################################
## deal with density

dens_issue <- rbindlist(lapply(c(-28:28), function(d){
  o <- rddensity(X = full_treat$d2, p = 1, c = d, h = l[["bws"]][1])
  
  return(data.table(day = d,
                    p_val = o[["test"]][["p_jk"]]))
})) |> 
  mutate(col = day == 0)
## figure s2
ggplot(dens_issue, aes(x = day, y = p_val, color = col)) +
  geom_line(color = "black") +
  scale_color_manual(values = c("black", "red"), guide = "none") +
  geom_point(aes(color = col)) +
  geom_hline(yintercept = 0.05, linetype = "dashed") +
  theme_bc() +
  labs(x = "Cut-point (days from election)",
       y = "P-value on density test") +
  scale_y_continuous(breaks = c(0, 0.05, 0.25, 0.5, 0.75, 1))

for(i in outpaths){
  ggsave(paste0(i, "diff_dens.png"), height = 4, width = 6, units = "in")
}

# figure s3
dens_issue_2 <- rbindlist(lapply(c(0:7), function(d){
  full_treat <- filter(full_treat, abs(d2) >= d)
  l <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = 1, c = 0, cluster = full_treat$id,
                covs = select(full_treat,
                              nh_black, latino, nh_white, asian, median_income, median_age,
                              pop_dens, turnout_pre, t16, some_college))
  return(data.table(day = d,
                    coef = l$coef,
                    u = l[["ci"]][,2],
                    l = l[["ci"]][,1]))
})) |> 
  mutate(col = day == 0)

dens_issue_2$estimate <- rep(c('Traditional','Bias-Adjusted','Robust'),nrow(dens_issue_2)/3)

dens_issue_2 <- filter(dens_issue_2, estimate == "Robust") |> 
  mutate(across(c(coef.Coeff, l, u), ~ . * -1))

ggplot(dens_issue_2,
             aes(x = day, y = coef.Coeff, ymin = l, ymax = u, color = col)) +
  geom_errorbar(width = 0, color = "black") +
  geom_point() +
  theme_bc(base_family = "LM Roman 10") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(y = "Local average treatment effect", x = "Size of donut around election day") +
  scale_color_manual(values = c("black", "red")) +
  theme(legend.position = "none",
        panel.spacing = unit(1, "lines"))

for(i in outpaths){
  ggsave(paste0(i, "donut_regs.png"), height = 4, width = 6, units = "in")
}

########################################
## run with different polynomials for Figure s8
out <- rbindlist(lapply(c(1:5), function(x){
  l <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = x, c = 0, cluster = full_treat$id,
                covs = select(full_treat,
                              nh_black, latino, nh_white, asian, median_income, median_age,
                              pop_dens, turnout_pre, t16, some_college))
  
  f <- tibble(coef = l$coef,
              se = l$se,
              n = l$N_h[1] + l$N_h[2],
              pv = l$pv,
              bwidth = l[["bws"]][1],
              p = x,
              u = l[["ci"]][,2],
              l = l[["ci"]][,1])
}))
saveRDS(out, "temp/alt_polys_data.rds")
out <- readRDS("temp/alt_polys_data.rds")
out$estimate <- rep(c('Traditional','Bias-Adjusted','Robust'),nrow(out)/3)
out$estimate <- factor(out$estimate, levels = c('Traditional','Bias-Adjusted','Robust'))
out <- mutate_at(out, vars(coef, l, u), ~. * -1)

out$z <- out$p == 1

dd <- ggplot(filter(out, estimate == "Robust"),
             aes(x = p, y = coef, ymin = l, ymax = u)) +
  geom_errorbar(width = 0) +
  geom_point(aes(color = z)) +
  theme_bc(base_family = "LM Roman 10") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(y = "Local average treatment effect", x = "Polynomial") +
  scale_color_manual(values = c("black", "red")) +
  theme(legend.position = "none")
dd
saveRDS(dd, "temp/diff_polys_primary.rds")

for(i in outpaths){
  ggsave(paste0(i, "diff_polys.png"), height = 4, width = 3, units = "in")
}

fwrite(filter(out, estimate == "Robust") |> 
         select(polynomial = p,
                estimate = coef,
                lower_bound = l,
                upper_bound = u), "data_for_figures/figure_A3.csv")

########################################
## run with different cutpoints for Figure s7
out <- rbindlist(lapply(c(-14:14), function(x){
  l <- rdrobust(y = full_treat$turnout, x = full_treat$d2, p = 1, c = x, cluster = full_treat$id,
                covs = select(full_treat,
                              nh_black, latino, nh_white, asian, median_income, median_age,
                              pop_dens, turnout_pre, t16, some_college)
  )
  
  f <- tibble(coef = l$coef,
              se = l$se,
              n = l$N_h[1] + l$N_h[2],
              bwidth = l[["bws"]][1],
              pv = l$pv,
              p = x,
              u = l[["ci"]][,2],
              l = l[["ci"]][,1])
}))
saveRDS(out, "temp/alt_cut_tab_data.rds")
out <- readRDS("temp/alt_cut_tab_data.rds")
out$estimate <- rep(c('Traditional','Bias-Adjusted','Robust'),nrow(out)/3)
out$estimate <- factor(out$estimate, levels = c('Traditional','Bias-Adjusted','Robust'))
out <- mutate_at(out, vars(coef, l, u), ~. * -1)

out$z <- out$p == 0

dd <- ggplot(filter(out, estimate == "Robust"),
             aes(x = p, y = coef, ymin = l, ymax = u)) +
  geom_errorbar(width = 0) +
  geom_point(aes(color = z)) +
  theme_bc(base_family = "LM Roman 10") +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(y = "Local average treatment effect", x = "Cut-point (days from election)") +
  scale_color_manual(values = c("black", "red")) +
  theme(legend.position = "none",
        panel.spacing = unit(1, "lines"))
dd
saveRDS(dd, "temp/placebos.rds")
for(i in outpaths){
  ggsave(paste0(i, "different_cutpoints.png"), height = 4, width = 3, units = "in")
}

fwrite(filter(out, estimate == "Robust") |> 
         select(day = p,
                estimate = coef,
                lower_bound = l,
                upper_bound = u), "data_for_figures/figure_A2.csv")

###########################

## create naive RDIT plot for Figure 2
j <- rdplot(y = full_treat$turnout, x = full_treat$d2, c = 0, p = 1,
            covs = select(full_treat,
                          nh_black, latino, nh_white, asian, median_income, median_age,
                          pop_dens, turnout_pre, t16, some_college)
)[["rdplot"]]

binned_dots_data <- j[["layers"]][[1]][["data"]] |> 
  select(x = rdplot_mean_bin, y = rdplot_mean_y)

left_line <- j[["layers"]][[2]][["data"]] |> 
  select(x = x_plot_l, y = y_hat_l)

right_line <- j[["layers"]][[3]][["data"]] |> 
  select(x = x_plot_r, y = y_hat_r)

all_dots = select(full_treat, x = d2, y = turnout)

j[["labels"]] <- j[["labels"]][-1]
j <- j +
  geom_point(aes(x = d2, y = turnout), data = full_treat, shape = 21,
             color = "transparent", fill = "gray", alpha = 0.5) +
  theme_bc(base_family = "LM Roman 10") +
  labs(x = "Days between mass shooting and election",
       y = "Turnout") +
  scale_y_continuous(labels = percent, limits = c(0.25, 0.75)) +
  scale_x_continuous(breaks = c(-180, -90, 0, 90, 180),
                     labels = c("Shooting occurs\n180 days\nbefore election", "90",
                                "Shooting occurs on\nelection day",
                                "90", "Shooting occurs\n180 days\nafter election"),
                     limits = c(-190, 190))
j

h <- j[["layers"]][[5]]

j[["layers"]][[5]] <- j[["layers"]][[1]]

j[["layers"]][[1]] <- h
j
saveRDS(j, "temp/rd_plot.rds")
for(i in outpaths){
  ggsave(paste0(i, "rd_plot.png"), height = 4, width = 6, units = "in")
}

fwrite(binned_dots_data, "data_for_figures/figure_2_blue_dots.csv")
fwrite(all_dots, "data_for_figures/figure_2_gray_dots.csv")
fwrite(left_line, "data_for_figures/figure_2_line_left_of_zero.csv")
fwrite(right_line, "data_for_figures/figure_2_line_right_of_zero.csv")

#################################
## create Table s2
full_treat$homicide <- as.numeric(full_treat$homicide)

vars <- c("nh_black", "latino", "nh_white", "asian", "median_income", "median_age",
          "pop_dens", "turnout_pre", "some_college", "av_temp", "cases", "share_group",
          "in_person_rate", "contact", "homicide")

vslim <- c("nh_black", "latino", "nh_white", "asian", "median_income", "median_age",
           "pop_dens", "turnout_pre", "some_college")

out1 <- rbindlist(lapply(vars, function(v){
  if(v %in% c("cases", "share_group")){
    full_treat <- filter(full_treat, !t16)
  }
  full_treat <- rename(full_treat, target := !!sym(v))
  
  cov <- vslim[vslim != v]
  
  p <- rdrobust(y = full_treat$target, x = full_treat$d2, p = 1, c = 0, cluster = full_treat$id,
                 covs = select(full_treat, all_of(cov)), bwselect = "cerrd")[["pv"]][[3]]
  return(data.table(var = v,
                    disc = p < 0.05))
}))

tr <- filter(full_treat, treated, abs(d2) < l$bws[[1]])
con <- filter(full_treat, !treated, abs(d2) < l$bws[[1]])
# 
# ks_outs <- rbindlist(lapply(vars, function(v){
#                                   
#                                   tr <- mutate(tr, target := !!sym(v)) |> 
#                                     filter(!is.na(target))
#                                   con <- mutate(con, target := !!sym(v)) |> 
#                                     filter(!is.na(target))
#                                   
#                                   return(data.table(name = v,
#                                                     ks_raw = ks_test(tr$target, con$target)[["p.value"]] < 0.05,
#                                                     
#                                                     t_raw = t.test(tr$target, con$target)[["p.value"]] < 0.05))
#                                 }))

## start with weighted means
demos <- rbind(tr, con) %>% 
  group_by(treated) %>% 
  summarize(across(vars, ~mean(., na.rm = T)),
            n = n_distinct(GEOID),
            n_shooting = n_distinct(id)) %>% 
  mutate(treated = ifelse(treated, "Treated", "Controls"))



## keep only untreated / not in data observations
f <- readRDS("temp/full_demos.rds") |> 
  filter(!(paste0(GEOID, year) %in% c(paste0(tr$GEOID, tr$year), paste0(con$GEOID, con$year)))) |> 
  mutate(turnout_pre = ballots_pre / cvap_pre,
         homicide = as.numeric(homicide)) %>% 
  select(vars, year, GEOID)

f <- f %>% 
  summarize(across(vars, ~ mean(ifelse(is.finite(.), ., NA), na.rm = T)),
            n = n_distinct(GEOID),
            n_shooting = 0) %>% 
  mutate(treated = "Not in Dataset")


demos <- bind_rows(demos, f) |> 
  mutate(cases = cases * 1000)


demos <- pivot_longer(demos, cols = c(vars, n, n_shooting))

demos <- pivot_wider(demos, id_cols = c("name"), names_from = "treated", values_from = "value")

demos <- select(demos, name, `Not in Dataset`,
                `Treated`, `Controls`)

demos <- #left_join(demos, ks_outs) |> 
  demos |> 
  left_join(out1, by = c("name" = "var")) |> 
  mutate(across(starts_with("ks"), ~ ifelse(is.na(.), F, .)),
         across(starts_with("t_"), ~ ifelse(is.na(.), F, .)),
         across(starts_with("disc"), ~ ifelse(is.na(.), F, .)))

###############################
##############################
vs <- fread("raw_data/var_orders.csv") |> 
  mutate(name = gsub("Killings", "Shootings", name))

demos <- left_join(rename(demos, variable = name), vs)

demos <- demos %>% 
  mutate_at(vars(`Not in Dataset`,
                 `Treated`, `Controls`),
            ~ ifelse(name == "Median Income", dollar(round(as.numeric(gsub(",", "", .)))), .)) %>% 
  mutate_at(vars(`Not in Dataset`,
                 `Treated`, `Controls`),
            ~ ifelse(name %in% c("Median Age", "Average Annual Temperature",
                                 "Covid Cases per 1k Residents",
                                 "Homicide Rate (Per 100k Residents)"), round(as.numeric(.), 1), .)) %>% 
  mutate_at(vars(`Not in Dataset`,
                 `Treated`, `Controls`),
            ~ ifelse(name %in% c("Population Density (Residents per Square Mile)", "Number of Block Groups",
                                 "Number of Killings"), comma(as.numeric(.), accuracy = 1), .)) %>% 
  mutate_at(vars(`Not in Dataset`,
                 `Treated`, `Controls`),
            ~ ifelse(substring(name, 1, 1) == "%" |
                       name %in% c("Previous Turnout", "% of Ballots Cast In-Person on Election Day"), percent(as.numeric(.), accuracy = .1), .)) |> 
  mutate(`Controls` = ifelse(disc, paste0(`Controls`, "*"), `Controls`),
         # `Controls` = ifelse(t_raw, paste0(`Controls`, "†"), `Controls`),
         # `Controls` = ifelse(ks_raw, paste0(`Controls`, "‡"), `Controls`)
         ) |> 
  select(-starts_with("t_"), -starts_with("ks_"), -starts_with("disc"))


demos <- demos %>% 
  arrange(order) %>% 
  ungroup() %>% 
  select(-order, -variable) %>%
  select(Variable = name, everything())

saveRDS(demos, "temp/demo_table_half_mile.rds")

for(out in outpaths_tabs){
  
  kable(demos, "latex", caption = "\\label{tab:balance} Demographics of Block Groups with Mass Shootings (0.5 miles)",
        linesep = "", align = c("l", rep("c", 3)),
        booktabs = T, escape = T) |> 
    column_spec(1, width = "6cm") |> 
    column_spec(c(2:3), width = "3cm") |> 
    kable_styling(latex_options = c("striped")) |> 
    row_spec(9, hline_after = TRUE) |> 
    row_spec(15, hline_after = TRUE) |> 
    footnote(general = c("* Variable discontinuous at cut-point (p < 0.05 using \\\\texttt{rdrobust()}).",
                         # "† Mean different from treated block groups (t-test, p < 0.05).",
                         # "‡ Distribution different from treated block groups (Kolmogorov–", "Smirnov test, p < 0.05).",
                         "Covid and group quarter measures only tested for 2020."),
             escape = F) |> 
    save_kable(paste0(out, "/balance.tex"))
  
}

###############################
###############################
###############################
###############################

# create table s1

primary <- readRDS("temp/primary_out_data.rds") |> 
  filter(p <= 10)
primary$estimate <- rep(c('Traditional','Bias-Adjusted','Robust'),nrow(primary)/3)

primary <- primary %>% 
  mutate(cint = paste0("[", round(u, digits = 3), ", ", round(l, digits = 3), "]"),
         across(c(coef, pv, se, n), ~round(., digits = 3)),
         n = comma(n, accuracy = 1),
         bw = floor(bw))

primary$`Threshold (Miles)` <- min(primary$p)
primary$Bandwidth <- primary$bw[1]
primary$`Effective Sample Size` <- primary$n[1]

for(i in c(2:nrow(primary))){
  primary$`Threshold (Miles)`[i] <- ifelse(primary$p[i] == primary$p[i-1], "", primary$p[i])
  primary$Bandwidth[i] <- ifelse(primary$p[i] == primary$p[i-1], "", primary$bw[i])
  primary$`Effective Sample Size`[i] <- ifelse(primary$p[i] == primary$p[i-1], "", primary$n[i])
}

primary <- primary |> 
  mutate(`Effective Sample Size` = ifelse(estimate == "Bias-Adjusted",
                                          paste0("[", comma(n_pre), ", ", comma(n_post), "]"),
                                          `Effective Sample Size`))

primary <- select(primary, `Threshold (Miles)`,
                  `Effective Sample Size`,
                  `Band-\nwidth` = Bandwidth,
                  Type = estimate,
                  `LATE (Turnout)` = coef,
                  `p-value` = pv,
                  `Confidence\nInterval` = cint,
                  `Std.\nError` = se)


knitr::kable(primary, "latex", caption = "\\label{tab:big-tab} RDiT Outcomes, Primary Model",
             linesep = "",
             align = rep('c', ncol(primary)), booktabs = T, espcape = T) %>%
  kable_styling(font_size = 10,
                latex_options = c("HOLD_position", "repeat_header", "striped")) %>%
  column_spec(c(1), width = "1.6cm") %>%
  column_spec(c(2), width = "2.3cm") %>% 
  column_spec(c(3, 5), width = "1cm") %>%
  column_spec(c(7), width = "3cm") %>%
  column_spec(c(4), width = "2.25cm") %>% 
  column_spec(c(6), width = "1.75cm") %>% 
  save_kable(paste0(outpaths_tabs[2], "primary.tex"))

kable(demos, "latex", caption = "\\label{tab:balance} Demographics of Block Groups with Mass Shootings (0.5 miles)",
      linesep = "", align = c("l", rep("c", 3)),
      booktabs = T, escape = T) |> 
  column_spec(1, width = "6cm") |> 
  column_spec(c(2:3), width = "3cm") |> 
  kable_styling(latex_options = c("striped")) |> 
  row_spec(9, hline_after = TRUE) |> 
  row_spec(15, hline_after = TRUE) |> 
  footnote(general = c("* Variable discontinuous at cut-point (p < 0.05 using \\\\texttt{rdrobust()}).",
                       "† Mean different from treated block groups (t-test, p < 0.05).",
                       "‡ Distribution different from treated block groups (Kolmogorov–", "Smirnov test, p < 0.05).",
                       "Covid and group quarter measures only tested for 2020."),
           escape = F) |> 
  save_kable(paste0(out, "/balance.tex"))